USAClusters <- fromJSON(file = "data/USAClusters_data.json")
USAClusters <- USAClusters$countries$USA
USAClusters <- as.data.frame(USAClusters) %>% 
  mutate(week = ymd(week)) %>% 
  arrange(week) %>% 
  pivot_longer(cols = -c(week, total_sequences), names_to = "variant", values_to = "sequences") %>% 
  mutate(perc_sequences = round(sequences / total_sequences * 100, 2)) %>% 
  rename(date = week)

USAClusters
sumtable(covid)
covid_NAs <- covid %>% 
  group_by(location) %>% 
  summarise_all(funs(sum(is.na(.)))) %>% 
  pivot_longer(cols = -location, names_to = "Variable", values_to = "NAs") %>% 
  mutate(Percent = round(NAs / nrow(covid) * 100 ,2)) %>% 
  arrange(-NAs)

covid_NAs
DT::datatable(
  covid_NAs, filter = 'top',
  #options = list(
 #   columnDefs = list(list(targets = 1, searchable = FALSE))
  #)
)
covid_NAs %>% 
  group_by(location) %>% 
  summarise(total_pct_na = sum(Percent)) %>% 
  arrange(total_pct_na) %>% 
  datatable(filter = 'top')
covid %>% 
  colnames()
 [1] "iso_code"                                   "continent"                                 
 [3] "location"                                   "date"                                      
 [5] "total_cases"                                "new_cases"                                 
 [7] "new_cases_smoothed"                         "total_deaths"                              
 [9] "new_deaths"                                 "new_deaths_smoothed"                       
[11] "total_cases_per_million"                    "new_cases_per_million"                     
[13] "new_cases_smoothed_per_million"             "total_deaths_per_million"                  
[15] "new_deaths_per_million"                     "new_deaths_smoothed_per_million"           
[17] "reproduction_rate"                          "icu_patients"                              
[19] "icu_patients_per_million"                   "hosp_patients"                             
[21] "hosp_patients_per_million"                  "weekly_icu_admissions"                     
[23] "weekly_icu_admissions_per_million"          "weekly_hosp_admissions"                    
[25] "weekly_hosp_admissions_per_million"         "total_tests"                               
[27] "new_tests"                                  "total_tests_per_thousand"                  
[29] "new_tests_per_thousand"                     "new_tests_smoothed"                        
[31] "new_tests_smoothed_per_thousand"            "positive_rate"                             
[33] "tests_per_case"                             "tests_units"                               
[35] "total_vaccinations"                         "people_vaccinated"                         
[37] "people_fully_vaccinated"                    "total_boosters"                            
[39] "new_vaccinations"                           "new_vaccinations_smoothed"                 
[41] "total_vaccinations_per_hundred"             "people_vaccinated_per_hundred"             
[43] "people_fully_vaccinated_per_hundred"        "total_boosters_per_hundred"                
[45] "new_vaccinations_smoothed_per_million"      "new_people_vaccinated_smoothed"            
[47] "new_people_vaccinated_smoothed_per_hundred" "stringency_index"                          
[49] "population"                                 "population_density"                        
[51] "median_age"                                 "aged_65_older"                             
[53] "aged_70_older"                              "gdp_per_capita"                            
[55] "extreme_poverty"                            "cardiovasc_death_rate"                     
[57] "diabetes_prevalence"                        "female_smokers"                            
[59] "male_smokers"                               "handwashing_facilities"                    
[61] "hospital_beds_per_thousand"                 "life_expectancy"                           
[63] "human_development_index"                    "excess_mortality_cumulative_absolute"      
[65] "excess_mortality_cumulative"                "excess_mortality"                          
[67] "excess_mortality_cumulative_per_million"   
head(covid$date)
[1] "2020-02-24" "2020-02-25" "2020-02-26" "2020-02-27" "2020-02-28" "2020-02-29"
us <- covid %>% 
  filter(location == "United States") 

us <- left_join(us, USAClusters, by = "date")
variants_plot <- us %>% 
  ggplot(aes(x = date)) +
  geom_line(aes(y = perc_sequences, color = variant), show.legend = FALSE) +
 # geom_vline(aes(xintercept = ymd(20200706)), color = "black") +
 # geom_vline(aes(xintercept = ymd(20210517)), color = "black") + 
 # geom_vline(aes(xintercept = ymd(20211004)), color = "black") + 
  #  geom_vline(aes(xintercept = ymd(20220105)), color = "black") + 
  theme_minimal()

cases_plot <- us %>% 
  ggplot(aes(x = date)) +
  geom_line(aes(y = new_cases_per_million), show.legend = FALSE) +
  geom_line(aes(y = new_deaths_per_million)) + 
 # geom_vline(aes(xintercept = ymd(20200706)), color = "black") +
 # geom_vline(aes(xintercept = ymd(20210517)), color = "black") + 
 # geom_vline(aes(xintercept = ymd(20211004)), color = "black") + 
  #  geom_vline(aes(xintercept = ymd(20220105)), color = "black") + 
  theme_minimal()

deaths_plot <- us %>% 
  ggplot(aes(x = date)) +
  geom_line(aes(y = new_deaths_per_million)) + 
 # geom_vline(aes(xintercept = ymd(20200706)), color = "black") +
#geom_vline(aes(xintercept = ymd(20210517)), color = "black") + 
#  geom_vline(aes(xintercept = ymd(20211004)), color = "black") + 
  #  geom_vline(aes(xintercept = ymd(20220105)), color = "black") + 
  theme_minimal()
ggplotly(variants_plot)
cases_plot
Warning: Removed 1 row(s) containing missing values (geom_path).
Warning: Removed 38 row(s) containing missing values (geom_path).

deaths_plot
Warning: Removed 38 row(s) containing missing values (geom_path).

us %>% 
  ggplot(aes(x = date)) +
  geom_line(aes(y = people_vaccinated_per_hundred)) +
#  geom_vline(aes(xintercept = ymd(20200706)), color = "black") +
# geom_vline(aes(xintercept = ymd(20210517)), color = "black") + 
# geom_vline(aes(xintercept = ymd(20211004)), color = "black") + 
#    geom_vline(aes(xintercept = ymd(20220105)), color = "black") + 
  theme_minimal()
Warning: Removed 695 row(s) containing missing values (geom_path).

covid$date[length(covid$date)]
[1] "2022-03-29"
USAClusters$date[length(USAClusters$date)]
[1] "2022-02-21"
max(us$new_cases_per_million, na.rm = TRUE)
[1] 4151.539
ggplot(us, aes(x = date)) + 
  geom_area(aes(y = perc_sequences, color = variant, fill = variant), alpha = 0.5, position = "dodge") + 
  geom_line(aes(y = new_cases_per_million / 40)) + 
  scale_y_continuous("Percent of Sequences", sec.axis=sec_axis(~.*40, name = "New Cases Per Million")) + 
  theme_minimal() + 
  labs(title = "Proportion of Covid Variants vs New Cases Per Million") -> p

ggplotly(p)
Warning: Width not defined. Set with `position_dodge(width = ?)`
LS0tDQp0aXRsZTogIkNPVklELTE5IERhdGFzZXQgQW5hbHlzaXMiDQpvdXRwdXQ6IGh0bWxfbm90ZWJvb2sNCi0tLQ0KDQpgYGB7ciwgZWNobyA9IEZBTFNFLCBtZXNzYWdlPUZBTFNFLCB3YXJuaW5nPUZBTFNFfQ0KbGlicmFyeSh0aWR5dmVyc2UpDQpsaWJyYXJ5KHNjYWxlcykNCmxpYnJhcnkocGxvdGx5KQ0KbGlicmFyeShEVCkNCmxpYnJhcnkoZGF0YS50YWJsZSkNCmxpYnJhcnkoZ3RhYmxlKQ0KbGlicmFyeShwbG90bHkpDQpsaWJyYXJ5KGx1YnJpZGF0ZSkNCmxpYnJhcnkodnRhYmxlKQ0KbGlicmFyeShyanNvbikNCmBgYA0KDQpgYGB7ciwgZWNobyA9IEZBTFNFfQ0KY292aWQgPC0gcmVhZF9jc3YoImRhdGEvb3dpZC1jb3ZpZC1kYXRhLmNzdiIsc2hvd19jb2xfdHlwZXMgPSBGQUxTRSkNCnZhcmlhbnRzIDwtIHJlYWRfY3N2KCJkYXRhL2NvdmlkLXZhcmlhbnRzLmNzdiIsc2hvd19jb2xfdHlwZXMgPSBGQUxTRSkNCmBgYA0KDQpgYGB7cn0NClVTQUNsdXN0ZXJzIDwtIGZyb21KU09OKGZpbGUgPSAiZGF0YS9VU0FDbHVzdGVyc19kYXRhLmpzb24iKQ0KVVNBQ2x1c3RlcnMgPC0gVVNBQ2x1c3RlcnMkY291bnRyaWVzJFVTQQ0KVVNBQ2x1c3RlcnMgPC0gYXMuZGF0YS5mcmFtZShVU0FDbHVzdGVycykgJT4lIA0KICBtdXRhdGUod2VlayA9IHltZCh3ZWVrKSkgJT4lIA0KICBhcnJhbmdlKHdlZWspICU+JSANCiAgcGl2b3RfbG9uZ2VyKGNvbHMgPSAtYyh3ZWVrLCB0b3RhbF9zZXF1ZW5jZXMpLCBuYW1lc190byA9ICJ2YXJpYW50IiwgdmFsdWVzX3RvID0gInNlcXVlbmNlcyIpICU+JSANCiAgbXV0YXRlKHBlcmNfc2VxdWVuY2VzID0gcm91bmQoc2VxdWVuY2VzIC8gdG90YWxfc2VxdWVuY2VzICogMTAwLCAyKSkgJT4lIA0KICByZW5hbWUoZGF0ZSA9IHdlZWspDQoNClVTQUNsdXN0ZXJzDQpgYGANCg0KDQpgYGB7cn0NCnN1bXRhYmxlKGNvdmlkKQ0KYGBgDQoNCg0KYGBge3J9DQpjb3ZpZF9OQXMgPC0gY292aWQgJT4lIA0KICBncm91cF9ieShsb2NhdGlvbikgJT4lIA0KICBzdW1tYXJpc2VfYWxsKGZ1bnMoc3VtKGlzLm5hKC4pKSkpICU+JSANCiAgcGl2b3RfbG9uZ2VyKGNvbHMgPSAtbG9jYXRpb24sIG5hbWVzX3RvID0gIlZhcmlhYmxlIiwgdmFsdWVzX3RvID0gIk5BcyIpICU+JSANCiAgbXV0YXRlKFBlcmNlbnQgPSByb3VuZChOQXMgLyBucm93KGNvdmlkKSAqIDEwMCAsMikpICU+JSANCiAgYXJyYW5nZSgtTkFzKQ0KDQpjb3ZpZF9OQXMNCmBgYA0KDQpgYGB7cn0NCkRUOjpkYXRhdGFibGUoDQogIGNvdmlkX05BcywgZmlsdGVyID0gJ3RvcCcsDQogICNvcHRpb25zID0gbGlzdCgNCiAjICAgY29sdW1uRGVmcyA9IGxpc3QobGlzdCh0YXJnZXRzID0gMSwgc2VhcmNoYWJsZSA9IEZBTFNFKSkNCiAgIykNCikNCmBgYA0KDQpgYGB7cn0NCmNvdmlkX05BcyAlPiUgDQogIGdyb3VwX2J5KGxvY2F0aW9uKSAlPiUgDQogIHN1bW1hcmlzZSh0b3RhbF9wY3RfbmEgPSBzdW0oUGVyY2VudCkpICU+JSANCiAgYXJyYW5nZSh0b3RhbF9wY3RfbmEpICU+JSANCiAgZGF0YXRhYmxlKGZpbHRlciA9ICd0b3AnKQ0KYGBgDQoNCmBgYHtyfQ0KY292aWQgJT4lIA0KICBjb2xuYW1lcygpDQpgYGANCg0KYGBge3J9DQpoZWFkKGNvdmlkJGRhdGUpDQpgYGANCiANCmBgYHtyfQ0KdXMgPC0gY292aWQgJT4lIA0KICBmaWx0ZXIobG9jYXRpb24gPT0gIlVuaXRlZCBTdGF0ZXMiKSANCg0KdXMgPC0gbGVmdF9qb2luKHVzLCBVU0FDbHVzdGVycywgYnkgPSAiZGF0ZSIpDQpgYGANCg0KYGBge3J9DQp2YXJpYW50c19wbG90IDwtIHVzICU+JSANCiAgZ2dwbG90KGFlcyh4ID0gZGF0ZSkpICsNCiAgZ2VvbV9saW5lKGFlcyh5ID0gcGVyY19zZXF1ZW5jZXMsIGNvbG9yID0gdmFyaWFudCksIHNob3cubGVnZW5kID0gRkFMU0UpICsNCiAjIGdlb21fdmxpbmUoYWVzKHhpbnRlcmNlcHQgPSB5bWQoMjAyMDA3MDYpKSwgY29sb3IgPSAiYmxhY2siKSArDQogIyBnZW9tX3ZsaW5lKGFlcyh4aW50ZXJjZXB0ID0geW1kKDIwMjEwNTE3KSksIGNvbG9yID0gImJsYWNrIikgKyANCiAjIGdlb21fdmxpbmUoYWVzKHhpbnRlcmNlcHQgPSB5bWQoMjAyMTEwMDQpKSwgY29sb3IgPSAiYmxhY2siKSArIA0KICAjICBnZW9tX3ZsaW5lKGFlcyh4aW50ZXJjZXB0ID0geW1kKDIwMjIwMTA1KSksIGNvbG9yID0gImJsYWNrIikgKyANCiAgdGhlbWVfbWluaW1hbCgpDQoNCmNhc2VzX3Bsb3QgPC0gdXMgJT4lIA0KICBnZ3Bsb3QoYWVzKHggPSBkYXRlKSkgKw0KICBnZW9tX2xpbmUoYWVzKHkgPSBuZXdfY2FzZXNfcGVyX21pbGxpb24pLCBzaG93LmxlZ2VuZCA9IEZBTFNFKSArDQogIGdlb21fbGluZShhZXMoeSA9IG5ld19kZWF0aHNfcGVyX21pbGxpb24pKSArIA0KICMgZ2VvbV92bGluZShhZXMoeGludGVyY2VwdCA9IHltZCgyMDIwMDcwNikpLCBjb2xvciA9ICJibGFjayIpICsNCiAjIGdlb21fdmxpbmUoYWVzKHhpbnRlcmNlcHQgPSB5bWQoMjAyMTA1MTcpKSwgY29sb3IgPSAiYmxhY2siKSArIA0KICMgZ2VvbV92bGluZShhZXMoeGludGVyY2VwdCA9IHltZCgyMDIxMTAwNCkpLCBjb2xvciA9ICJibGFjayIpICsgDQogICMgIGdlb21fdmxpbmUoYWVzKHhpbnRlcmNlcHQgPSB5bWQoMjAyMjAxMDUpKSwgY29sb3IgPSAiYmxhY2siKSArIA0KICB0aGVtZV9taW5pbWFsKCkNCg0KZGVhdGhzX3Bsb3QgPC0gdXMgJT4lIA0KICBnZ3Bsb3QoYWVzKHggPSBkYXRlKSkgKw0KICBnZW9tX2xpbmUoYWVzKHkgPSBuZXdfZGVhdGhzX3Blcl9taWxsaW9uKSkgKyANCiAjIGdlb21fdmxpbmUoYWVzKHhpbnRlcmNlcHQgPSB5bWQoMjAyMDA3MDYpKSwgY29sb3IgPSAiYmxhY2siKSArDQojZ2VvbV92bGluZShhZXMoeGludGVyY2VwdCA9IHltZCgyMDIxMDUxNykpLCBjb2xvciA9ICJibGFjayIpICsgDQojICBnZW9tX3ZsaW5lKGFlcyh4aW50ZXJjZXB0ID0geW1kKDIwMjExMDA0KSksIGNvbG9yID0gImJsYWNrIikgKyANCiAgIyAgZ2VvbV92bGluZShhZXMoeGludGVyY2VwdCA9IHltZCgyMDIyMDEwNSkpLCBjb2xvciA9ICJibGFjayIpICsgDQogIHRoZW1lX21pbmltYWwoKQ0KYGBgDQoNCmBgYHtyfQ0KZ2dwbG90bHkodmFyaWFudHNfcGxvdCkNCmBgYA0KDQpgYGB7cn0NCmNhc2VzX3Bsb3QNCmBgYA0KDQpgYGB7cn0NCmRlYXRoc19wbG90DQpgYGANCg0KYGBge3J9DQp1cyAlPiUgDQogIGdncGxvdChhZXMoeCA9IGRhdGUpKSArDQogIGdlb21fbGluZShhZXMoeSA9IHBlb3BsZV92YWNjaW5hdGVkX3Blcl9odW5kcmVkKSkgKw0KIyAgZ2VvbV92bGluZShhZXMoeGludGVyY2VwdCA9IHltZCgyMDIwMDcwNikpLCBjb2xvciA9ICJibGFjayIpICsNCiMgZ2VvbV92bGluZShhZXMoeGludGVyY2VwdCA9IHltZCgyMDIxMDUxNykpLCBjb2xvciA9ICJibGFjayIpICsgDQojIGdlb21fdmxpbmUoYWVzKHhpbnRlcmNlcHQgPSB5bWQoMjAyMTEwMDQpKSwgY29sb3IgPSAiYmxhY2siKSArIA0KIyAgICBnZW9tX3ZsaW5lKGFlcyh4aW50ZXJjZXB0ID0geW1kKDIwMjIwMTA1KSksIGNvbG9yID0gImJsYWNrIikgKyANCiAgdGhlbWVfbWluaW1hbCgpDQpgYGANCg0KYGBge3J9DQpjb3ZpZCRkYXRlW2xlbmd0aChjb3ZpZCRkYXRlKV0NClVTQUNsdXN0ZXJzJGRhdGVbbGVuZ3RoKFVTQUNsdXN0ZXJzJGRhdGUpXQ0KYGBgDQoNCmBgYHtyfQ0KDQpgYGANCg0KYGBge3J9DQptYXgodXMkbmV3X2Nhc2VzX3Blcl9taWxsaW9uLCBuYS5ybSA9IFRSVUUpDQpgYGANCg0KDQpgYGB7ciwgd2FybiA9IEZBTFNFfQ0KZ2dwbG90KHVzLCBhZXMoeCA9IGRhdGUpKSArIA0KICBnZW9tX2FyZWEoYWVzKHkgPSBwZXJjX3NlcXVlbmNlcywgY29sb3IgPSB2YXJpYW50LCBmaWxsID0gdmFyaWFudCksIGFscGhhID0gMC41LCBwb3NpdGlvbiA9ICJkb2RnZSIpICsgDQogIGdlb21fbGluZShhZXMoeSA9IG5ld19jYXNlc19wZXJfbWlsbGlvbiAvIDQwKSkgKyANCiAgc2NhbGVfeV9jb250aW51b3VzKCJQZXJjZW50IG9mIFNlcXVlbmNlcyIsIHNlYy5heGlzPXNlY19heGlzKH4uKjQwLCBuYW1lID0gIk5ldyBDYXNlcyBQZXIgTWlsbGlvbiIpKSArIA0KICB0aGVtZV9taW5pbWFsKCkgKyANCiAgbGFicyh0aXRsZSA9ICJQcm9wb3J0aW9uIG9mIENvdmlkIFZhcmlhbnRzIHZzIE5ldyBDYXNlcyBQZXIgTWlsbGlvbiIpIC0+IHANCg0KZ2dwbG90bHkocCkNCmBgYA0KDQo=